# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from paddle import _legacy_C_ops
from paddle.fluid.framework import _non_static_mode
from paddle.fluid.layer_helper import LayerHelper
from paddle.tensor.linalg import matmul


def fused_matmul_bias(
    x, y, bias=None, transpose_x=False, transpose_y=False, name=None
):
    """
    Applies matrix multiplication of two tensors and then bias addition if provided.
    This method requires CUDA version >= 11.6.

    Args:
        x (Tensor): the first input Tensor to be multiplied.
        y (Tensor): the second input Tensor to be multiplied. Its rank must be 2.
        bias (Tensor|None): the input bias Tensor. If it is None, no bias addition would
            be performed. Otherwise, the bias is added to the matrix multiplication result.
        transpose_x (bool): Whether to transpose :math:`x` before multiplication.
        transpose_y (bool): Whether to transpose :math:`y` before multiplication.
        name(str|None): For detailed information, please refer to
            :ref:`api_guide_Name` . Usually name is no need to set and None by default.

    Returns:
        Tensor: the output Tensor.

    Examples:
        .. code-block:: python

            # required: gpu
            import paddle
            from paddle.incubate.nn.functional import fused_matmul_bias

            x = paddle.randn([3, 4])
            y = paddle.randn([4, 5])
            bias = paddle.randn([5])
            out = fused_matmul_bias(x, y, bias)
            print(out.shape) # [3, 5]
    """
    if bias is None:
        return matmul(x, y, transpose_x, transpose_y, name)
    if _non_static_mode():
        return _legacy_C_ops.fused_gemm_epilogue(
            x, y, bias, 'trans_x', transpose_x, 'trans_y', transpose_y
        )

    helper = LayerHelper('fused_matmul_bias', **locals())
    out = helper.create_variable_for_type_inference(dtype=x.dtype)
    helper.append_op(
        type='fused_gemm_epilogue',
        inputs={'X': x, 'Y': y, 'Bias': bias},
        outputs={'Out': out},
        attrs={'trans_x': transpose_x, 'trans_y': transpose_y},
    )
    return out


def fused_linear(x, weight, bias=None, transpose_weight=False, name=None):
    """
    Fully-connected linear transformation operator. This method requires CUDA version >= 11.6.

    Args:
        x (Tensor): the input Tensor to be multiplied.
        weight (Tensor): the weight Tensor to be multiplied. Its rank must be 2.
        bias (Tensor|None): the input bias Tensor. If it is None, no bias addition would
            be performed. Otherwise, the bias is added to the matrix multiplication result.
        transpose_weight (bool): Whether to transpose :math:`weight` before multiplication.
        name(str|None): For detailed information, please refer to
            :ref:`api_guide_Name` . Usually name is no need to set and None by default.

    Returns:
        Tensor: the output Tensor.

    Examples:
        .. code-block:: python

            # required: gpu
            import paddle
            from paddle.incubate.nn.functional import fused_linear

            x = paddle.randn([3, 4])
            weight = paddle.randn([4, 5])
            bias = paddle.randn([5])
            out = fused_linear(x, weight, bias)
            print(out.shape) # [3, 5]
    """
    return fused_matmul_bias(x, weight, bias, False, transpose_weight, name)
